source("../modularity_analysis_functions.R", local = knitr::knit_global())
# reading the microbiome data
# working only with Rattus from the three villages

# villages: Andatsakala, Mandena, Sarahandrano
data_asv <- read_csv("../../data/data_processed/microbiome/data_asv_rra0.001_p0.01_th5000_all.csv")

1 Exploration

# small mammals data
small_mammals <- read_csv("../../data/data_raw/data_small_mammals/Terrestrial_Mammals.csv") %>% 
  mutate(host_ID = as.numeric(gsub(".*?([0-9]+).*", "\\1", animal_id))) %>% 
  mutate(age_repro = as_factor(age_repro)) %>% 
  dplyr::rename(grid = habitat_type) %>% 
  filter(host_ID %in% data_asv$host_ID)

cat("## Abundance", '\n','\n')

1.1 Abundance

g <- small_mammals %>% 
  count(village,grid) %>% 
  ggplot(aes(x=grid, y=n)) +
  geom_bar(position="dodge", stat="identity") +
  theme_bw() +
  theme(axis.text = element_text(size = 10, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14)) +
  labs(x="Land Use", y="Small-Mammals Abundance")
print(g)

cat("## Sex ratio", '\n','\n')

1.2 Sex ratio

g <- small_mammals %>% 
  count(grid, sex) %>% 
  ggplot(aes(fill=sex, x=grid, y=n)) +
  geom_bar(position="fill", stat="identity") +
  geom_hline(yintercept = 0.5, linetype = "dashed") +
  theme_bw() +
  theme(axis.text = element_text(size = 10, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14)) +
  labs(x="Land Use", y="Proportion")
print(g)

cat("## Age ratio", '\n','\n')

1.3 Age ratio

g <- small_mammals %>% 
  count(grid, age_repro) %>% 
  ggplot(aes(fill=age_repro, x=grid, y=n)) +
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  theme(axis.text = element_text(size = 10, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14)) +
  labs(x="Land Use", y="Proportion")
print(g)

# relative abundance of the 10 most abundant Family of each group

# microbes taxonomy
tax <- read_delim("../../data/data_raw/data_microbiome/ASVs_taxonomy_new.tsv") %>% 
  dplyr::rename(asv_ID = ASV)

data_asv_tax <- data_asv_p %>% 
  left_join(tax, by="asv_ID")

cat("## Relative abundance Family", '\n','\n')

1.4 Relative abundance Family

total_reads_groups <- data_asv_tax %>% distinct(host_ID, asv_core, total_reads) %>% group_by(asv_core) %>% summarise(n_total = sum(total_reads))
most_abu_family <- data_asv_tax %>%
  mutate(reads_a = reads*total_reads) %>%
  group_by(asv_core, Family) %>%
  summarise(n= sum(reads_a)) %>%
  left_join(total_reads_groups, by="asv_core") %>%
  mutate(n_p = n/n_total) 

  most_abu_family_8 <- most_abu_family %>% 
    filter(!is.na(Family)) %>% 
    ungroup() %>% 
    slice_max(by = asv_core, order_by = n_p, n = 8) %>% 
    mutate(p = "top") %>% 
    select(asv_core, Family, p)
  
  largest_modules <- modules_table_three_groups %>%
    group_by(asv_core, host_group) %>% 
    summarise(n = n_distinct(host_ID)) %>% 
    ungroup() %>%
    slice_max(by = asv_core, order_by = n, n = 5, with_ties = FALSE) %>% 
    mutate(p = "top")
  
  
 modules_top <- modules_table_three_groups %>% 
   left_join(largest_modules, by=c("asv_core","host_group")) %>% 
  mutate(module = factor(ifelse(is.na(p),"Other",host_group))) %>%
  distinct(host_ID,host_group,asv_core, module) 
 
 total_reads_modules <-  modules_table_three_groups %>% 
   distinct(host_ID, asv_core, host_group, total_reads)  %>% 
   left_join(modules_top %>% distinct(asv_core,host_group,module), by=c("asv_core","host_group")) %>% 
    group_by(asv_core, module) %>% summarise(n_total = sum(total_reads))
  
  most_abu_family_p <- data_asv_tax %>% left_join(modules_top, by=c("asv_core","host_ID")) %>% 
  mutate(reads_a = reads*total_reads) %>%
  group_by(asv_core, Family, module) %>%
  summarise(n= sum(reads_a)) %>%
  left_join(total_reads_modules, by=c("asv_core","module")) %>%
  mutate(n_p = n/n_total) %>% 
    left_join(most_abu_family_8, by=c("asv_core","Family")) %>% 
    mutate(p = case_when(p=="top" ~ Family,
                         is.na(Family) ~ ".NA",
                         is.na(p) ~ ".Other"))
library("RColorBrewer")
colors <- c("grey40","grey80","darkblue","#ffd60a",brewer.pal(n = 12, name = "Paired"))
  g <- most_abu_family_p %>% 
    group_by(asv_core,module,  p) %>% 
    summarise(n_p = sum(n_p)) %>% 
ggplot(aes(fill=p, x=module, y=n_p)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~asv_core, scales = "free_x") +
  theme_bw() +
  theme(axis.text = element_text(size = 10, color = 'black'), title = element_text(size = 14),
        strip.text = element_text(size=12, color = 'black'), 
        panel.grid = element_blank(), panel.background = element_rect(colour = "black"), legend.key.size = unit(0.5, "cm")) +
  scale_fill_manual(values=colors) +
  labs(x="Module ID", y="Relative Abundance", fill = "Family")
print(g)

cat('\n','\n')
# a=modules_table_three_groups %>% filter(asv_core != "Rare") %>% mutate(module = factor(ifelse(host_group==1,1,0))) %>%
#   count(host_ID, asv_core, module) %>% 
#   group_by(asv_core, module) %>% 
#   summarise(avg_degree = mean(n), median_degree = median(n))
#    
# modules_table_three_groups %>% filter(asv_core != "Rare") %>% mutate(module = factor(ifelse(host_group==1,1,0))) %>%
#   count(host_ID, asv_core, module) %>% 
# ggplot(aes( x=module, y=n)) +
#   geom_boxplot()+
#   facet_wrap(~asv_core)+
#   theme_bw()

2 PERMANOVA

2.1 Bray-Curtis

set.seed(123)
data_asv_mat <- data_asv %>% 
 # filter(asv_core == "Non-core") %>% 
  select(host_ID, asv_ID, reads) %>% 
  spread(asv_ID, reads, fill = 0) %>% 
  column_to_rownames("host_ID") %>% 
  as.matrix()

hosts <- small_mammals %>% 
  filter(host_ID %in% rownames(data_asv_mat)) %>% 
  select(host_ID,village,grid,season,month,mass, sex,age_repro,age_dental,elevation.obs) %>% 
  mutate(village=as.factor(village),grid=as.factor(grid), season=as.factor(season),month=as.factor(month), sex=as.factor(sex), age_dental=as.factor(age_dental)) 
  #left_join(modules_table_three_groups %>% filter(asv_core == "Non-core") %>% distinct(host_ID,host_group), by="host_ID") %>% 
  #mutate(module = factor(ifelse(host_group==1,1,0)))

distance_matrix <- vegdist(data_asv_mat, method = "bray")

# Perform PERMANOVA
permanova_result <- adonis2(distance_matrix ~ grid+season+village, data = hosts, permutations = 999)
print(knitr::kable(permanova_result[,4:5]))
F Pr(>F)
grid 2.331399 0.001
season 4.768973 0.001
village 5.666093 0.001
Residual NA NA
Total NA NA
cat('\n')
# PERMANOVA post-hoc
# perm_post_grid <- RVAideMemoire::pairwise.perm.manova(distance_matrix, fact = hosts$grid, 
#     test = "bonferroni", nperm = 999, progress = FALSE)$p.value %>% as.data.frame()
# print(knitr::kable(perm_post_grid))
# cat('\n')

#NMDS
nmds_result <- metaMDS(distance_matrix, distance = "bray", k = 2, trace = F)

# preparing data for plotting
nmds_plot <- nmds_result$points %>% 
  as.data.frame() %>%  
  rownames_to_column("host_ID") %>% 
  mutate(host_ID = as.double(host_ID)) %>% 
  left_join(hosts, by="host_ID")

# plotting
g1 <- nmds_plot %>% 
  ggplot( aes(MDS1, MDS2, color=grid, shape=village)) +
  geom_point(size = 2, position=position_jitter(.01)) +
  #stat_ellipse(aes(fill=grid), alpha=.1, type='norm',linetype =2, geom="polygon") + ##draws 95% confidence interval ellipses
  theme_bw() +
  #annotate("text", x=min(nmds_plot$MDS1)+0.04, size=3, y=max(nmds_plot$MDS2), label=paste('Stress =',round(nmds_result$stress,3))) +
  labs(x = "NMDS1", y = "NMDS2", title = "(A) Bray-Curtis")
print(g1)

2.2 UniFrac

set.seed(123)
library(GUniFrac)

rooted_phylo_tree <- phangorn::midpoint(phylo_tree)
distance_matrix <- GUniFrac(data_asv_mat, rooted_phylo_tree, verbose = F)$unifracs[, , "d_UW"]

# Perform weighted UniFrac
unifrac_result <- adonis2(distance_matrix ~ grid+season+village, data = hosts, permutations = 999)
print(knitr::kable(unifrac_result[,4:5]))
F Pr(>F)
grid 2.644989 0.001
season 6.105313 0.001
village 8.450150 0.001
Residual NA NA
Total NA NA
cat('\n')
# PERMANOVA post-hoc
# perm_post_grid <- RVAideMemoire::pairwise.perm.manova(distance_matrix, fact = hosts$grid, 
#     test = "bonferroni", nperm = 999, progress = FALSE)$p.value %>% as.data.frame()
# print(knitr::kable(perm_post_grid))
# cat('\n')

#NMDS
nmds_result2 <- metaMDS(distance_matrix, k = 2, trace = F)

# preparing data for plotting
nmds_plot2 <- nmds_result2$points %>% 
  as.data.frame() %>%  
  rownames_to_column("host_ID") %>% 
  mutate(host_ID = as.double(host_ID)) %>% 
  left_join(hosts, by="host_ID")

# plotting
g2 <- nmds_plot2 %>% 
  ggplot( aes(MDS1, MDS2, color=grid, shape=season)) +
  geom_point(size = 2, position=position_jitter(.05)) +
  #stat_ellipse(aes(fill=grid), alpha=.1, type='norm',linetype =2, geom="polygon") + ##draws 95% confidence interval ellipses
  theme_bw() +
  annotate("text", x=min(nmds_plot2$MDS1)+0.15, size=3, y=max(nmds_plot2$MDS2), label=paste('Stress =',round(nmds_result2$stress,3))) +
  labs(x = "NMDS1", y = "NMDS2", title = "(B) Weighted UniFrac")
print(g2)

2.3 Figure

# plotting
g1 + g2 + plot_layout(guides='collect') &
  theme(legend.position='bottom', legend.spacing.x=unit(0.1, 'cm'))

cat('\n','\n')

3 Microbial Groups

3.1 Genus

# taking only ASVs with known Genus
data_tax <- data_asv_tax %>% 
  distinct(host_ID, asv_core, Genus) %>% 
  filter(!is.na(Genus))

# known ASVs
asv_genus <- data_asv_tax %>% 
  distinct(asv_ID, Genus) %>%
  mutate(name = is.na(Genus)) 
  print(paste("Percentege of known ASVs:", round(table(asv_genus$name)[1]/nrow(asv_genus)*100, 2)))

[1] “Percentege of known ASVs: 41.26”

  cat('\n')
# how many Genus?
print(paste("Number of genera:", length(unique(data_tax$Genus))))

[1] “Number of genera: 119”

cat('\n')
# making unique samples (host_ID+asv_core)
data_tax_meta <- data_tax %>% 
  arrange(host_ID) %>% 
  unite(col="sample", host_ID:asv_core, remove = FALSE)
  
data_tax_meta2 <- data_tax_meta %>% distinct(sample, asv_core) 

data_tax_summary <- data_tax %>% 
  count(asv_core, Genus) %>% 
  spread(Genus, n, fill = 0)

# transforming to matrix
data_tax_mat <- data_tax_meta %>% 
  select(sample, Genus) %>% 
  mutate(n=1) %>% 
  spread(Genus, n, fill = 0) %>% 
  column_to_rownames("sample") %>% 
  as.matrix()

# Perform PERMANOVA
permanova_result <- adonis2(data_tax_mat ~ asv_core, data = data_tax_meta2, method = "jaccard", permutations = 999)
print(paste("F =", permanova_result$F[1]))

[1] “F = 202.170648540601”

cat('\n')
print(paste("p-value =", permanova_result$`Pr(>F)`[1]))

[1] “p-value = 0.001”

cat('\n')
# PCA
tax_pca <- prcomp(data_tax_mat)

# explained variance
ex_var <- tax_pca$sdev ^2 
prop_ex_var <- ex_var/sum(ex_var)*100

loadings <- as.data.frame(tax_pca$rotation[, 1:2]) %>% rownames_to_column("Genus") %>% 
  mutate(PC1_abs = abs(PC1), PC2_abs = abs(PC2))

tax_scores <- as.data.frame(tax_pca$x[,1:2]) %>% rownames_to_column("sample") %>% 
  left_join(data_tax_meta2, by="sample") 

loading_top <- loadings %>% 
  slice_max(n = 2, order_by = PC1_abs) %>% 
  bind_rows(loadings %>% slice_max(n = 2, order_by = PC2_abs)) 

g1 <- tax_scores %>% 
  ggplot(aes(PC1, PC2, color=asv_core)) +
  geom_hline(yintercept = 0, linetype = "dotted", color = "grey") +
  geom_vline(xintercept = 0, linetype = "dotted", color = "grey") +
  geom_point() +
  geom_label_repel(
    data = loading_top, 
    inherit.aes = FALSE, 
    aes(x = PC1, y = PC2, label = Genus), 
    size = 3, 
    box.padding = 0.2,
    point.padding = 0.2,
    segment.colour = NA
  ) +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", size=1), panel.grid = element_blank())+
  scale_color_manual(values=group.colors) +
  labs(x = paste("PC1 (",round(prop_ex_var[1],2),"%)", sep = ""), y = paste("PC2 (",round(prop_ex_var[2],2),"%)", sep = ""), 
       title = "(A) Genus", color = "ASV Group")

3.2 Family

# taking only ASVs with known Family
data_tax <- data_asv_tax %>% 
  filter(asv_core != "All") %>%
  distinct(host_ID, asv_core, Family) %>% 
  filter(!is.na(Family))

# known ASVs
asv_genus <- data_asv_tax %>% 
  distinct(asv_ID, Family) %>%
  mutate(name = is.na(Family)) 
  print(paste("Percentege of known ASVs:", round(table(asv_genus$name)[1]/nrow(asv_genus)*100, 2)))

[1] “Percentege of known ASVs: 90.72”

  cat('\n')
# how many Family?
print(paste("Number of families:", length(unique(data_tax$Family))))

[1] “Number of families: 55”

cat('\n')
# making unique samples (host_ID+asv_core)
data_tax_meta <- data_tax %>% 
  arrange(host_ID) %>% 
  unite(col="sample", host_ID:asv_core, remove = FALSE)
  
data_tax_meta2 <- data_tax_meta %>% distinct(sample, asv_core) 

data_tax_summary <- data_tax %>% 
  count(asv_core, Family) %>% 
  spread(Family, n, fill = 0)

# transforming to matrix
data_tax_mat <- data_tax_meta %>% 
  select(sample, Family) %>% 
  mutate(n=1) %>% 
  spread(Family, n, fill = 0) %>% 
  column_to_rownames("sample") %>% 
  as.matrix()

# Perform PERMANOVA
permanova_result <- adonis2(data_tax_mat ~ asv_core, data = data_tax_meta2, method = "jaccard", permutations = 999)
print(paste("F =", permanova_result$F[1]))

[1] “F = 186.236039342593”

cat('\n')
print(paste("p-value =", permanova_result$`Pr(>F)`[1]))

[1] “p-value = 0.001”

cat('\n')
# PCA
tax_pca <- prcomp(data_tax_mat)

# explained variance
ex_var <- tax_pca$sdev ^2 
prop_ex_var <- ex_var/sum(ex_var)*100

loadings <- as.data.frame(tax_pca$rotation[, 1:2]) %>% rownames_to_column("Family") %>% 
  mutate(PC1_abs = abs(PC1), PC2_abs = abs(PC2))

tax_scores <- as.data.frame(tax_pca$x[,1:2]) %>% rownames_to_column("sample") %>% 
  left_join(data_tax_meta2, by="sample") 

loading_top <- loadings %>% 
  slice_max(n = 2, order_by = PC1_abs) %>% 
  bind_rows(loadings %>% slice_max(n = 2, order_by = PC2_abs)) 

g2 <- tax_scores %>% 
  ggplot(aes(PC1, PC2, color=asv_core)) +
  geom_hline(yintercept = 0, linetype = "dotted", color = "grey") +
  geom_vline(xintercept = 0, linetype = "dotted", color = "grey") +
  geom_point() +
  geom_label_repel(
    data = loading_top, 
    inherit.aes = FALSE, 
    aes(x = PC1, y = PC2, label = Family), 
    size = 3, 
    box.padding = 0.2,
    point.padding = 0.2,
    segment.colour = NA
  ) +
  scale_x_continuous(limits = c(-2, 2.7)) +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", size=1), panel.grid = element_blank())+
  scale_color_manual(values=group.colors) +
  labs(x = paste("PC1 (",round(prop_ex_var[1],2),"%)", sep = ""), y = paste("PC2 (",round(prop_ex_var[2],2),"%)", sep = ""), 
       title = "(B) Family", color = "ASV Group")

3.3 Figure

# plotting
g1 + g2 + plot_layout(guides='collect') &
  theme(legend.position='bottom')

cat('\n','\n')

4 Network exploration

# loop for three groups
for (i in 1:2) {
  
  cat('##',core_names[i],'{.tabset}','\n','\n')
  
  cat('### ASVs degree distribution','\n')
  
  print(asv_degree_distribution_three_groups[[i]])
  cat('\n','\n')
  
  # calculating network properties
  connectance_data <- modules_table_three_groups %>% 
    filter(asv_core == core_names[i])
  
  cat('No. of hosts: ', length(unique(connectance_data$host_ID)) ,'\n','\n')
  cat('No. of ASVs: ', length(unique(connectance_data$asv_ID)) ,'\n','\n')
  
  asv_mean_degree <- connectance_data %>% distinct(asv_ID,asv_degree) %>% summarise(mean = mean(asv_degree), sd = sd(asv_degree))
  cat('ASV mean degree: ', asv_mean_degree$mean, 'ASV sd degree: ', asv_mean_degree$sd,'\n','\n')
  
  host_mean_degree <- connectance_data %>% count(host_ID) %>% summarise(mean = mean(n), sd = sd(n))
  cat('Host mean degree: ', host_mean_degree$mean, 'Host sd degree: ', host_mean_degree$sd,'\n','\n')
  
  cat('Connectance: ', nrow(connectance_data) / (length(unique(connectance_data$host_ID)) * length(unique(connectance_data$asv_ID))) ,'\n','\n')
  
  no_modules <- modules_table_three_groups %>% 
    filter(asv_core == core_names[i]) %>%
  summarise(n = n_distinct(host_group))
    cat('Number of modules: ', no_modules$n,'\n','\n')
    
  modules_sizes <- modules_table_three_groups %>% 
    filter(asv_core == core_names[i]) %>% 
  group_by(asv_core, host_group) %>% 
  summarise(n = n_distinct(host_ID)) %>% 
    summarise(mean = mean(n), sd = sd(n))
  cat('Module mean size: ', modules_sizes$mean, 'Module sd size: ', modules_sizes$sd,'\n','\n')

    
  
  cat('### Modules','\n')
  cat('The color indicates number of host individuals in the module / total number of hosts in the whole grid [%]','\n','\n')
  
  print(modules_three_groups[[i]])
  cat('\n','\n')
  
  cat('### Modules size','\n')
  print(modules_size_three_groups[[i]])
  cat('\n','\n')
  
  cat('### No. of land uses','\n')
  print(modules_grid_three_groups[[i]])
  cat('\n','\n')
}

4.1 Core

4.1.1 ASVs degree distribution

No. of hosts: 853

No. of ASVs: 103

ASV mean degree: 293.3592 ASV sd degree: 124.4865

Host mean degree: 35.42321 Host sd degree: 13.61787

Connectance: 0.3439147

Number of modules: 11

Module mean size: 77.54545 Module sd size: 116.3558

4.1.2 Modules

The color indicates number of host individuals in the module / total number of hosts in the whole grid [%]

4.1.3 Modules size

4.1.4 No. of land uses

4.2 Non-core

4.2.1 ASVs degree distribution

No. of hosts: 855

No. of ASVs: 1848

ASV mean degree: 37.37879 ASV sd degree: 33.92127

Host mean degree: 80.79064 Host sd degree: 35.91382

Connectance: 0.04371788

Number of modules: 89

Module mean size: 9.606742 Module sd size: 41.12636

4.2.2 Modules

The color indicates number of host individuals in the module / total number of hosts in the whole grid [%]

4.2.3 Modules size

4.2.4 No. of land uses

cat('## Modules','\n')

4.3 Modules

  modules_three_groups[[1]]+modules_three_groups[[2]] + plot_layout(nrow=1,ncol=2,guides='collect',axes = "collect_y",axis_titles = "collect") &
  theme(legend.position='bottom')

  cat('\n','\n')

4.4 Modules

5 Modular Structure Exploration

5.1 Modules size

5.2 Modules No. of Grids

5.3 No. of Modules

5.4 Figure

6 Phylogenetic analysis

# combining final results
assembly_final <- assembly_three_groups %>% 
  mutate(same_module = ifelse(host_group1==host_group2, "Same","Different"))


# renaming the process
assembly_final %<>% mutate(process = case_when(process=="Heterogeneous.Selection" ~ "Heterogeneous Selection",
                                               process=="Homogeneous.Selection" ~ "Homogeneous Selection",
                                               process=="Dispersal.Limitation" ~ "Dispersal Limitation",
                                               process=="Homogenizing.Dispersal" ~ "Homogenizing Dispersal",
                                               .default = "Drift"))

# summary
assembly_summary <- assembly_final %>% 
  count(asv_core, same_module, process)

assembly_summary_total <- assembly_summary %>% 
  group_by(asv_core) %>% 
  summarise(n_total = sum(n))

assembly_summary_total_module <- assembly_summary %>% 
  group_by(asv_core, same_module) %>% 
  summarise(n_total = sum(n))

#plotting process ratio between groups
g1 <- assembly_summary %>% 
  group_by(asv_core, process) %>% 
  summarise(n = sum(n)) %>% 
  left_join(assembly_summary_total, by=c("asv_core")) %>% 
  mutate(n_p = n/n_total) %>% 
  ggplot(aes(fill=process, x=asv_core, y=n_p)) +
  geom_bar(position="fill", stat="identity") +
  scale_y_continuous(labels = scales::percent) +
  geom_text(aes(label = paste0(round(n_p*100,1),"%")), 
            position = position_stack(vjust = 0.5), size = 3)+
  theme_bw() +
  theme(axis.text = element_text(size = 11, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14),
        strip.text = element_text(size=12, color = 'black'), strip.background = element_rect(color = "grey80", size = 1), 
        panel.grid = element_blank(), panel.background = element_rect(colour = "black")) +
  scale_fill_manual(values=c("#a7c957","#f2e8cf","#f07167","#83c5be")) +
  labs(x="ASVs Groups", y="Percentage")
print(g1)

#plotting process ratio between modules
g2 <- assembly_summary %>% 
  left_join(assembly_summary_total_module, by=c("asv_core","same_module")) %>% 
  mutate(n_p = n/n_total) %>% 
  ggplot(aes(fill=process, x=same_module, y=n_p)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~asv_core) +
  scale_y_continuous(labels = scales::percent) +
  geom_text(aes(label = paste0(round(n_p*100,1),"%")), 
            position = position_stack(vjust = 0.5), size = 3)+
  theme_bw() +
  theme(axis.text = element_text(size = 9, color = 'black', angle = 90, vjust = 0.5, hjust=1), title = element_text(size = 14),
        strip.text = element_text(size=12, color = 'black'), strip.background = element_rect(color = "grey80", size = 1), 
        panel.grid = element_blank(), panel.background = element_rect(colour = "black")) +
  scale_fill_manual(values=c("#a7c957","#f2e8cf","#f07167","#83c5be")) +
  labs(x="Hosts from the Same/Different Modules", y="Percentage", fill="Process")
print(g2)

7 Normalized Mutual Information (NMI)

8 Modules similarity across land use change gradient

8.1 Correlations between variables

# reading grid similarity results
grids_similarity_attr <- read_csv("../../data/data_processed/village_summary_new.csv")
rownames(grids_similarity_attr) <- grids_similarity_attr$grid_village

# correlations between variables
print(psych::pairs.panels(grids_similarity_attr %>%  select(-grid_village,-village,-grid), ellipses = F, lm = F))

NULL

cat('\n','\n')
# RDA for modules

anova_model_all <- NULL
anova_rda_all <- NULL
score_modules_top_all <- NULL
rda_figs <- list()
set.seed(123)

# loop for three asv groups
for (i in 1:2) {
  
  modules_similarity2 <- modules_similarity2_three_groups[[i]]
  grid_names <- rownames(modules_similarity2)
  # filtering matrix to the existing grids
  grids_similarity_attr2 <- grids_similarity_attr %>%  filter(grid_village %in% grid_names & !grepl("village", grid_village))
  modules_similarity2 <- modules_similarity2[rownames(grids_similarity_attr2),]
  
  n=data_asv %>% group_by(village,grid) %>% summarise(n=n_distinct(host_ID))
  grids_similarity_attr2 %<>% left_join(n, by=c("village","grid"))
  # standardizing variables to zero mean and unit variance
  grids_similarity_attr2[4:11] <- decostand(grids_similarity_attr2[4:11], 'standardize')
  
  # tb-RDA
  
  # hellinger transformation
  modules_similarity2_hell <- decostand(modules_similarity2, 'hell')
  
  
  rda_result <- rda(modules_similarity2_hell ~ veg_PC1+veg_PC2+dist_to_village+elevation+Condition(n), data=grids_similarity_attr2, na.action = na.omit)
  # **Condition(n) - control for the number of host in each village-grid.
  
  # variation explained
  constrained_eig <- t(as.data.frame(rda_result$CCA$eig/rda_result$tot.chi*100))  # for RDAs
# unconstrained_eig <- rda_result$CA$eig/rda_result$tot.chi*100
# expl_var <- c(constrained_eig, unconstrained_eig)
# barplot (expl_var[1:20], col = c(rep ('red', length (constrained_eig)), rep ('black', length (unconstrained_eig))),
#          las = 2, ylab = '% variation')
  
R2.obs <- RsquareAdj (rda_result)$adj.r.squared

# significance test for the whole model
  anova_model <- anova(rda_result)[1,] %>% 
    mutate(R2.adj = R2.obs, asv_core = core_names[i])
  
  anova_model_all <- rbind(anova_model_all, anova_model)
  
  # significance test for all variables
  anova_rda <- anova(rda_result, by = "margin", permutations = 999) %>% 
    mutate(asv_core = core_names[i])
  anova_rda$`Pr(>F)` <- p.adjust (anova_rda$`Pr(>F)`, method = 'holm')  
  
  anova_rda_all <- rbind(anova_rda_all, anova_rda)
  
  # scores
  score_modules <- as.data.frame(vegan::scores(rda_result)$species) %>% 
    mutate(RDA1_abs = abs(RDA1), RDA2_abs = abs(RDA2))
  score_modules_top <- score_modules %>% 
  slice_max(n = 5, order_by = RDA1_abs) %>% 
  bind_rows(score_modules %>% slice_max(n = 5, order_by = RDA2_abs)) %>% 
    rownames_to_column("host_group") %>% 
    mutate(asv_core = core_names[i])
  
  score_modules_top_all <- rbind(score_modules_top_all, score_modules_top)
  
  # plotting
  # samples
  rda_samples <- as.data.frame(scores(rda_result)$sites) %>% 
    rownames_to_column("grid_village") %>% 
    left_join(grids_similarity_attr2[c("grid_village","village","grid")], by="grid_village") %>% 
    mutate(asv_core = core_names[i]) %>% 
        mutate(grid = factor(grid, levels = c("semi-intact_forest","secondary_forest","brushy_regrowth","agriculture","agroforest","flooded_rice","village"))) 

  
  # variables
  rda_var <- as.data.frame(rda_result$CCA$biplot)
  rownames(rda_var) <- c("Vegetation1","Vegetation2","Distance","Elevation")
  
  # plot
  lu.color <- c("#005824","#238b45","#41ae76","#66c2a4","#99d8c9","#ccece6")
g <- rda_samples %>% 
  ggplot(aes(RDA1, RDA2, color=grid)) +
  geom_hline(yintercept = 0, linetype = "dotted", color = "grey") +
  geom_vline(xintercept = 0, linetype = "dotted", color = "grey") +
  geom_point(size = 4) +
  geom_segment(
    data = rda_var, 
    inherit.aes = FALSE, 
    alpha = 0.8,
    aes(x = 0, y = 0, xend = (RDA1 * 0.85), yend = (RDA2 * 0.85)), 
    arrow = arrow(length = unit(0.15, "cm"))
  ) +
  geom_text_repel(
    data = rda_var, 
    inherit.aes = FALSE, 
    aes(x = RDA1, y = RDA2, label = rownames(rda_var)), 
    size = 4, 
    box.padding = 0.5,
    point.padding = 0.5,
    segment.colour = NA
  ) +
  theme_bw() +
  coord_cartesian(
    xlim = range(rda_samples$RDA1) * c(1.3, 1.2), 
    ylim = range(rda_samples$RDA2) * c(1.3, 1.2), 
    clip = 'off'
  ) +
  theme(
    panel.border = element_rect(colour = "black", size = 1), 
    panel.grid = element_blank(), aspect.ratio = 1
  ) +
  scale_color_manual(values=lu.color) +
  labs(
    x = paste("RDA1 (", round(constrained_eig[1], 2), "%)", sep = ""), 
    y = paste("RDA2 (", round(constrained_eig[2], 2), "%)", sep = ""), 
    title = paste(core_names[i]), 
    color = "Land Use", 
    shape = "Village"
  )
  rda_figs <- append(rda_figs, list(g))
}

print(knitr::kable(anova_model_all))
Df Variance F Pr(>F) R2.adj asv_core
Model 4 0.0749485 1.205524 0.203 0.0491638 Core
Model1 4 0.2375024 1.416986 0.001 0.1001372 Non-core
cat('\n','\n')
print(knitr::kable(anova_rda_all))
Df Variance F Pr(>F) asv_core
veg_PC1 1 0.0246650 1.5869155 0.540 Core
veg_PC2 1 0.0097680 0.6284648 1.000 Core
dist_to_village 1 0.0175965 1.1321409 1.000 Core
elevation 1 0.0168504 1.0841379 1.000 Core
Residual 11 0.1709699 NA NA Core
veg_PC11 1 0.0675428 1.6118954 0.024 Non-core
veg_PC21 1 0.0465410 1.1106920 0.298 Non-core
dist_to_village1 1 0.0572640 1.3665935 0.120 Non-core
elevation1 1 0.0667735 1.5935348 0.024 Non-core
Residual1 11 0.4609301 NA NA Non-core
cat('\n','\n')
rda_figs[[1]] + rda_figs[[2]] + plot_layout(ncol=2, nrow=1, guides='collect') & theme(legend.position='right')

cat('\n','\n')

8.2 Mantel test